# setwd("/Volumes/home/greally-lab/Claudia_Andrew/CRISPR_Proj_combined")
# load in libraries
library(DESeq2)
library(EDASeq)
library(matrixStats)
library(RUVSeq)
library(qvalue)
library(genefilter)
library(RColorBrewer)
library(pheatmap)
library(UpSetR)
library(RFmarkerDetector)
library(ggplot2)
library(ggthemes)
# set options
options(scipen=999, stringsAsFactors = FALSE)
The goal of this study was to 1) assess the robust activation of CD34 upon transient transfection of HEK 293T cells with a CRISPR-deadCas9-VP128 and 2) to assess the potential off-target effects of this method. We perform a simple differential expression analysis among our three treatments_r1: control, tranfection with dCas9-VP128 w/o gRNAs, and transfection with dCas9-VP128 and gRNAs targetted to the promoter of CD34. We perform analysis of each replicate separately and then combined.
The RNA-seq libraries were prepared similarly with ERCC spike-ins. However, the spike-ins could not be used to normalize the libraries due to their low count number; this most likely is due to the spike-ins not being added at a high enough concentration. Another important caveat to the libraries is that in the first replicate the control library was generated from cells passed through the FACS machine, whereas the in second replicate, the control cells were not passed through the FACS machine.
First we read in the two biological replicates (and their two technical reps for each treatment).
# reading in and merging the counts
## selecting the files ot read in
files <- grep(pattern = "ReadsPerGene.out.tab", x = list.files(), value = TRUE)
files
## [1] "1A.1ReadsPerGene.out.tab" "1minus.1ReadsPerGene.out.tab"
## [3] "2Aplus.1ReadsPerGene.out.tab" "2plus.1ReadsPerGene.out.tab"
## [5] "3AQ2.1ReadsPerGene.out.tab" "3plus.1ReadsPerGene.out.tab"
## [7] "CD34_1.1ReadsPerGene.out.tab" "CD34_2.1ReadsPerGene.out.tab"
## [9] "CRISPR_1.1ReadsPerGene.out.tab" "CRISPR_2.1ReadsPerGene.out.tab"
## [11] "Ctrl_1.1ReadsPerGene.out.tab" "Ctrl_2.1ReadsPerGene.out.tab"
list_counts <- list()
for (i in 1:length(files)){
list_counts[[i]] <- read.table(paste(files[i]))
if (i < 2) {
df_counts <- list_counts[[1]][,c(1,4)]
}
else {
df_counts <- merge(df_counts, list_counts[[i]][,c(1,4)], by = "V1")
}
}
dim(df_counts) # 60700 13
## [1] 60700 13
## remove the ambiguous, multimapp, no feature, and unmapped read totals
df_counts <- df_counts[-c(60697:60700),]
rownames(df_counts) <- df_counts[,1]
df_counts <- df_counts[,-1]
colnames(df_counts) <- c("Ctrl_1_1", "Ctrl_1_2", "CRISPR_1_1", "CRISPR_1_2", "CD34_1_1",
"CD34_1_2", "CD34_2_1", "CD34_2_2", "CRISPR_2_1","CRISPR_2_2",
"Ctrl_2_1", "Ctrl_2_2")
head(df_counts)
## Ctrl_1_1 Ctrl_1_2 CRISPR_1_1 CRISPR_1_2 CD34_1_1 CD34_1_2
## ENSG00000000003 1915 2792 1970 1439 1937 2008
## ENSG00000000005 0 3 0 1 0 0
## ENSG00000000419 601 770 1036 693 873 924
## ENSG00000000457 342 389 397 234 302 352
## ENSG00000000460 637 867 786 504 670 663
## ENSG00000000938 1 0 2 0 1 1
## CD34_2_1 CD34_2_2 CRISPR_2_1 CRISPR_2_2 Ctrl_2_1 Ctrl_2_2
## ENSG00000000003 2104 1654 1935 2562 2205 2708
## ENSG00000000005 0 0 1 0 0 1
## ENSG00000000419 1037 774 982 1227 696 762
## ENSG00000000457 341 278 311 453 288 396
## ENSG00000000460 796 571 739 896 683 803
## ENSG00000000938 1 0 7 0 0 0
Next we filter the RNAs to be analzyed. First, we apply a simple filter for only those RNAs that are expressed at high levels. The RNA must have at least 5 counts in four of the samples, thus allowing only genes expressed by only one treatment group to be retained. Next, we filter for protein coding genes only or protein coding and long non-coding RNAs.
# expression filter
idx_filt_exp_com <- apply(df_counts, 1, function(x) length(x[x>5])>=4)
head(idx_filt_exp_com)
## ENSG00000000003 ENSG00000000005 ENSG00000000419 ENSG00000000457
## TRUE FALSE TRUE TRUE
## ENSG00000000460 ENSG00000000938
## TRUE FALSE
filtered_com <- df_counts[idx_filt_exp_com,]
dim(filtered_com) # 19,244 12
## [1] 19244 12
# remove spike ins
spikes_com <- grep("ERCC", rownames(filtered_com))
length(spikes_com) # 12
## [1] 12
filterd_noSpike_com <- filtered_com[-spikes_com,]
dim(filterd_noSpike_com) # 19232 12
## [1] 19232 12
# filter for only protein coding RNAs
prot_ensg_ID <- read.table("../../indexes/Hg38_rel79_ERCC/prot_ENSG.txt")
dim(prot_ensg_ID) # 22002 1
## [1] 22002 1
filterd_noSpike_pc_com <- filterd_noSpike_com[
rownames(filterd_noSpike_com) %in% prot_ensg_ID$V1,]
dim(filterd_noSpike_pc_com) # 14260 6
## [1] 14260 12
Let’s look at the PCA and RLE plots.
# resorting the columns so that controls, CRISPRs, and CD34s are next to each other
filterd_noSpike_pc_com <- filterd_noSpike_pc_com[,c(1,2,11,12,3,4,9,10,5,6,7,8)]
# set a factor for different treatments
treatments_com <- as.factor(rep(c("Ctrl","CRISPR","CD34"), each=4))
treatments_com <- relevel(treatments_com, c("Ctrl"))
replicates_com <- as.factor(rep(c("Rep1", "Rep2", "Rep1", "Rep2", "Rep1", "Rep2"),each=2))
# create expression sets
eset_pc_com <- newSeqExpressionSet(as.matrix(filterd_noSpike_pc_com),
phenoData = data.frame(treatments_com,
row.names=colnames(filterd_noSpike_pc_com)))
# choose a color set
colors_com <- brewer.pal(6, "Dark2")
colors <- brewer.pal(3, "Dark2")
# Make RLE plots
plotRLE(eset_pc_com, outline=FALSE, ylim=c(-4, 4), col=colors[treatments_com],
main="Protein coding RNAs before normalization")
limma::plotMDS(counts(eset_pc_com), dim=c(2,3))
# Make PCA plots
plotPCA(eset_pc_com, col=colors[treatments_com], cex=1.2,
main = "Protein coding RNAs before normalization")
plotPCA(eset_pc_com, k=3, col=colors[treatments_com], cex=1.2,
main="Protein coding RNAs before normalization")
plotPCA(eset_pc_com, k=3, col=colors[replicates_com], cex=1.2,
main="Protein coding RNAs before normalization")
The different treatment groups cluster together as expected except for CRISPR_1_1. Also PC3 seems to be driven by replicate (1v2).
Next we normalize based on housekeeping gene expression. House keeping genes were identified by a previous study “Human housekeeping genes revisited” E. Eisenberg and E.Y. Levanon, Trends in Genetics, 29 (2013) and a list is avaialble for download at (https://www.tau.ac.il/~elieis/HKG/). We took only the bottom quartile variance house keeping genes to use for normalization.
# read in house keeping genes
HK_genes <- read.table("HK_ensembl_ID.txt")
dim(HK_genes) # 4202 1
## [1] 4202 1
# grab the HK genes from RNAs being analyzed
HK_pc_com <- filterd_noSpike_pc_com[which(rownames(filterd_noSpike_pc_com) %in% HK_genes[,1]),]
dim(HK_pc_com) # 3753 12
## [1] 3753 12
# examine the variance of the HK genes and take only the bottom 1000 genes to normalize with
## for protein coding RNAs only
HK_pc_com_rsd <- apply(as.matrix(HK_pc_com), 1, rsd)
boxplot(HK_pc_com_rsd)
summary(HK_pc_com_rsd)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 12.62 18.24 20.33 21.56 23.32 83.66
HK_pc_lowRSD <- sort(HK_pc_com_rsd)[1:1000]
# Normalize using the house keeping genes
eset_pc_norm_com <- RUVg(eset_pc_com, names(HK_pc_lowRSD), k=1)
# The weights have been added to the phenotype data
pData(eset_pc_norm_com)
## treatments_com W_1
## Ctrl_1_1 Ctrl -0.23181281
## Ctrl_1_2 Ctrl 0.30140064
## Ctrl_2_1 Ctrl -0.15068670
## Ctrl_2_2 Ctrl 0.19543575
## CRISPR_1_1 CRISPR 0.23101365
## CRISPR_1_2 CRISPR -0.50824987
## CRISPR_2_1 CRISPR -0.00221291
## CRISPR_2_2 CRISPR 0.51117416
## CD34_1_1 CD34 -0.00075488
## CD34_1_2 CD34 0.07351361
## CD34_2_1 CD34 0.04398612
## CD34_2_2 CD34 -0.46280677
# Make RLE plots
plotRLE(eset_pc_norm_com, outline=FALSE, ylim=c(-4, 4), col=colors[treatments_com],
main="Protein coding RNAs after normalization")
# Make PCA plots
plotPCA(eset_pc_norm_com, col=colors[treatments_com], cex=1.2,
main = "Protein coding RNAs after normalization")
plotPCA(eset_pc_norm_com, k=3, col=colors[treatments_com], cex=1.2,
main="Protein coding RNAs after normalization")
Now PC1 captures the differences between the transfected and non-transfected treatments. PC2 is now based upon replicate. PC3 captures the differences among the treatments perfectly. We will continue with this since the replicate will be included in the DEseq model.
Next, we perform the differential expression among the different treatments
#adding the replicates information to the pData
pData(eset_pc_norm_com) <- cbind(pData(eset_pc_norm_com), replicates_com)
# convert the expression set to a DESeq object
dds_pc_com <- DESeqDataSetFromMatrix(countData = counts(eset_pc_norm_com),
colData = pData(eset_pc_norm_com),
design = ~W_1 + replicates_com + treatments_com)
# Run DESeq Wald tests
dds_pc_com <- DESeq(dds_pc_com)
# generate results among the different treatments_com and set a log fold change threshold of 1
ruv_res_con_CD34_pc_com <- results(dds_pc_com, lfcThreshold=1, altHypothesis="greaterAbs",
contrast = c("treatments_com", "CD34", "Ctrl"), alpha=0.05)
ruv_res_con_crisp_pc_com <- results(dds_pc_com, lfcThreshold=1, altHypothesis="greaterAbs",
contrast = c("treatments_com", "CRISPR", "Ctrl"), alpha=0.05)
ruv_res_crisp_CD34_pc_com <- results(dds_pc_com, lfcThreshold=1, altHypothesis="greaterAbs",
contrast = c("treatments_com", "CD34", "CRISPR"), alpha=0.05)
# draw MA plots of results
## draw horizontal lines for log fold change threshold
drawLines <- function() abline(h=c(-1,1),col="dodgerblue",lwd=2)
ylim<-c(-8,8)
##draw the MA plots
DESeq2::plotMA(ruv_res_con_CD34_pc_com,
main="RUV Ctrl vs CD34 PC-com", ylim=ylim); drawLines()
DESeq2::plotMA(ruv_res_con_crisp_pc_com,
main="RUV Ctrl vs CRISPR PC-com", ylim=ylim); drawLines()
DESeq2::plotMA(ruv_res_crisp_CD34_pc_com,
main="RUV CRISPR vs CD34 PC-com", ylim=ylim); drawLines()
# ENSG00000174059 is CD34 ENSG ID
ruv_res_con_CD34_pc_com["ENSG00000174059",]
## log2 fold change (MLE): treatments_com CD34 vs Ctrl
## Wald test p-value: treatments com CD34 vs Ctrl
## DataFrame with 1 row and 6 columns
## baseMean log2FoldChange lfcSE stat
## <numeric> <numeric> <numeric> <numeric>
## ENSG00000174059 310.1427 5.60014 0.1811934 25.38801
## pvalue
## <numeric>
## ENSG00000174059 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003420907
## padj
## <numeric>
## ENSG00000174059 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004878214
ruv_res_con_crisp_pc_com["ENSG00000174059",]
## log2 fold change (MLE): treatments_com CRISPR vs Ctrl
## Wald test p-value: treatments com CRISPR vs Ctrl
## DataFrame with 1 row and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSG00000174059 310.1427 -1.236545 0.3046578 -0.77643 0.4374951
## padj
## <numeric>
## ENSG00000174059 1
ruv_res_crisp_CD34_pc_com["ENSG00000174059",]
## log2 fold change (MLE): treatments_com CD34 vs CRISPR
## Wald test p-value: treatments_com CD34 vs CRISPR
## DataFrame with 1 row and 6 columns
## baseMean log2FoldChange lfcSE stat
## <numeric> <numeric> <numeric> <numeric>
## ENSG00000174059 310.1427 6.836686 0.2658514 21.95469
## pvalue
## <numeric>
## ENSG00000174059 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007810719
## padj
## <numeric>
## ENSG00000174059 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001113808
# looking at the
summary(ruv_res_con_CD34_pc_com) # 116 up and 36 down
##
## out of 14260 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 123, 0.86%
## LFC < 0 (down) : 38, 0.27%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 3)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
summary(ruv_res_con_crisp_pc_com) # 104 up and 22 down
##
## out of 14260 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 103, 0.72%
## LFC < 0 (down) : 22, 0.15%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 3)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
summary(ruv_res_crisp_CD34_pc_com) # only CD34 up!
##
## out of 14260 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 1, 0.007%
## LFC < 0 (down) : 0, 0%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 3)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# grabbing the ENSG IDs from the differentially expressed genes
ruv_res_con_CD34_pc_com_nona <- ruv_res_con_CD34_pc_com[!is.na(ruv_res_con_CD34_pc_com$padj),]
ruv_res_con_CD34_pc_com_IDs <-
rownames(ruv_res_con_CD34_pc_com_nona)[ruv_res_con_CD34_pc_com_nona$padj<0.05]
length(ruv_res_con_CD34_pc_com_IDs) #161
## [1] 161
num_CD34Vctrl <- length(ruv_res_con_CD34_pc_com_IDs)
ruv_res_con_crisp_pc_com_IDs <-
rownames(ruv_res_con_crisp_pc_com)[ruv_res_con_crisp_pc_com$padj<0.05]
length(ruv_res_con_crisp_pc_com_IDs) #125
## [1] 125
num_crispVctrl <- length(ruv_res_con_crisp_pc_com_IDs)
sum(ruv_res_con_CD34_pc_com_IDs %in% ruv_res_con_crisp_pc_com_IDs) # 97 are the same
## [1] 97
num_overlaps <- sum(ruv_res_con_CD34_pc_com_IDs %in% ruv_res_con_crisp_pc_com_IDs)
ruv_res_pc_com_IDs <- ruv_res_con_CD34_pc_com_IDs[(ruv_res_con_CD34_pc_com_IDs %in% ruv_res_con_crisp_pc_com_IDs)]
ruv_res_con_CD34_pc_com_sig <- as.data.frame(ruv_res_con_CD34_pc_com[rownames(ruv_res_con_CD34_pc_com) %in% ruv_res_pc_com_IDs,])
ruv_res_con_CD34_pc_com_sig$Ensembl_ID <- rownames(ruv_res_con_CD34_pc_com_sig)
ruv_res_con_crisp_pc_com_sig <- as.data.frame(ruv_res_con_crisp_pc_com[rownames(ruv_res_con_crisp_pc_com) %in% ruv_res_pc_com_IDs,])
ruv_res_con_crisp_pc_com_sig$Ensembl_ID <- rownames(ruv_res_con_crisp_pc_com_sig)
ruv_res_con_C34_CRISPR_pc_com_sig <- merge(x = ruv_res_con_CD34_pc_com_sig,
y = ruv_res_con_crisp_pc_com_sig[,-1],
by = "Ensembl_ID", all=T)
write.table(ruv_res_con_C34_CRISPR_pc_com_sig, "ruv_res_con_C34_CRISPR_pc_com_sig2.txt",
row.names = F, col.names = TRUE, sep="\t", append=F, quote = F)
write.csv(ruv_res_con_C34_CRISPR_pc_com_sig, "ruv_res_con_C34_CRISPR_pc_com_sig2.txt",
row.names = F, col.names = TRUE, quote = F)
Let’s make an upset plot with the combined and the individual replicates.
# Setting up the dataframe for the upset plot.
ruv_res_con_CD34_pc_com_IDs_upsetdf <- data.frame(Gene=ruv_res_con_CD34_pc_com_IDs,
Com_ctrl_v_CD34=1)
ruv_res_con_crisp_pc_com_IDs_upsetdf <- data.frame(Gene=ruv_res_con_crisp_pc_com_IDs,
Com_ctrl_v_CRISPR=1)
Combined_upsetdf <- merge(ruv_res_con_CD34_pc_com_IDs_upsetdf,
ruv_res_con_crisp_pc_com_IDs_upsetdf,
by="Gene", all=T)
dim(Combined_upsetdf) # 189 3
## [1] 189 3
Combined_upsetdf_nona <- replace(Combined_upsetdf,is.na(Combined_upsetdf),0)
sum(duplicated(Combined_upsetdf_nona$Gene))
## [1] 0
upset(Combined_upsetdf_nona, nsets = 2, number.angles = 0, point.size = 3.5,
line.size = 1.5, mainbar.y.label = "DE Gene Intersections",
sets.x.label = "# of DE Genes", text.scale = c(1.3, 1, 1, 1, 1, 1), order.by = "freq")
# I would look at the gene ontology enrichment of the combined overlapping 97 genes.
saveRDS(ruv_res_pc_com_IDs, "ruv_res_pc_com_IDs2.rds")
ruv_res_pc_com_IDs
## [1] "ENSG00000006128" "ENSG00000006327" "ENSG00000008517"
## [4] "ENSG00000031698" "ENSG00000046774" "ENSG00000049249"
## [7] "ENSG00000052749" "ENSG00000070669" "ENSG00000077009"
## [10] "ENSG00000077150" "ENSG00000078237" "ENSG00000087074"
## [13] "ENSG00000087245" "ENSG00000095370" "ENSG00000099860"
## [16] "ENSG00000101255" "ENSG00000104112" "ENSG00000104856"
## [19] "ENSG00000105499" "ENSG00000109089" "ENSG00000111981"
## [22] "ENSG00000115129" "ENSG00000115414" "ENSG00000115461"
## [25] "ENSG00000119922" "ENSG00000120738" "ENSG00000122733"
## [28] "ENSG00000124194" "ENSG00000124253" "ENSG00000128965"
## [31] "ENSG00000130513" "ENSG00000133169" "ENSG00000134986"
## [34] "ENSG00000135423" "ENSG00000136235" "ENSG00000137090"
## [37] "ENSG00000138271" "ENSG00000139292" "ENSG00000139364"
## [40] "ENSG00000141756" "ENSG00000144452" "ENSG00000145147"
## [43] "ENSG00000147257" "ENSG00000148677" "ENSG00000149131"
## [46] "ENSG00000151012" "ENSG00000152137" "ENSG00000153233"
## [49] "ENSG00000153879" "ENSG00000155495" "ENSG00000159307"
## [52] "ENSG00000160712" "ENSG00000161011" "ENSG00000163121"
## [55] "ENSG00000163661" "ENSG00000165323" "ENSG00000165655"
## [58] "ENSG00000166900" "ENSG00000167767" "ENSG00000168003"
## [61] "ENSG00000168394" "ENSG00000168542" "ENSG00000169393"
## [64] "ENSG00000169429" "ENSG00000169896" "ENSG00000171223"
## [67] "ENSG00000171951" "ENSG00000173093" "ENSG00000173276"
## [70] "ENSG00000175197" "ENSG00000175592" "ENSG00000175868"
## [73] "ENSG00000176046" "ENSG00000176406" "ENSG00000176746"
## [76] "ENSG00000179046" "ENSG00000180447" "ENSG00000181026"
## [79] "ENSG00000182916" "ENSG00000183696" "ENSG00000185437"
## [82] "ENSG00000186115" "ENSG00000186340" "ENSG00000186529"
## [85] "ENSG00000187608" "ENSG00000187775" "ENSG00000187955"
## [88] "ENSG00000188211" "ENSG00000196878" "ENSG00000197355"
## [91] "ENSG00000197558" "ENSG00000214212" "ENSG00000223638"
## [94] "ENSG00000229292" "ENSG00000243955" "ENSG00000254415"
## [97] "ENSG00000265972"
any(ruv_res_pc_com_IDs == "ENSG00000116721")
## [1] FALSE
Making a diagram of the overlap between treatment comparisons
library(VennDiagram)
grid.newpage()
draw.pairwise.venn(num_crispVctrl, num_CD34Vctrl, num_overlaps,
category = c("CRISPR v. Ctrl", "CD34 v. Ctrl"),
lty = rep("blank", 2), fill = c("#7570B3", "#D95F02"),
alpha = rep(0.5, 2), cat.pos = c(0, 0),
cat.dist = rep(0.025, 2))
## (polygon[GRID.polygon.103], polygon[GRID.polygon.104], polygon[GRID.polygon.105], polygon[GRID.polygon.106], text[GRID.text.107], text[GRID.text.108], text[GRID.text.109], text[GRID.text.110], text[GRID.text.111])
pdf("Compare_VD_1.pdf", width=6, height = 4, family="ArialMT")
draw.pairwise.venn(num_crispVctrl, num_CD34Vctrl, num_overlaps,
category = c("CRISPR v. Ctrl", "CD34 v. Ctrl"),
lty = rep("blank", 2), fill = c("#7570B3", "#D95F02"),
alpha = rep(0.5, 2), cat.pos = c(0, 0),
cat.dist = rep(0.025, 2))
## (polygon[GRID.polygon.112], polygon[GRID.polygon.113], polygon[GRID.polygon.114], polygon[GRID.polygon.115], text[GRID.text.116], text[GRID.text.117], text[GRID.text.118], text[GRID.text.119], text[GRID.text.120])
dev.off()
## quartz_off_screen
## 2
Checking to make sure that the chosen qPCR validation targets are within the 97 genes overlapping between the comparisons.
# in order: NFKB2, TRIB3, RelB, EGR1, JUNB, DDIT3, fosl1
qPCR_IDs <- c("ENSG00000077150", "ENSG00000101255", "ENSG00000104856",
"ENSG00000120738", "ENSG00000171223", "ENSG00000175197",
"ENSG00000175592")
length(qPCR_IDs) # 7
## [1] 7
# check IDs
sum(ruv_res_pc_com_IDs %in% qPCR_IDs) #7
## [1] 7
Tried to manipulate the MAplot function code to make figures.. but it didn’t work. This next code chunk is not evaluated but is kept for scratch work.
t_col <- function(color, percent = 50, name = NULL) {
# color = color name
# percent = % transparency
# name = an optional name for the color
## Get RGB values for named color
rgb.val <- col2rgb(color)
## Make new color using input color as base and alpha set by transparency
t.col <- rgb(rgb.val[1], rgb.val[2], rgb.val[3],
max = 255,
alpha = (100-percent)*255/100,
names = name)
## Save the color
invisible(t.col)
}
lt_black <- t_col("black", percent = 50, name = "black_t")
# red for CD34
# purple for similar?
# blue for significant?
# CRISPR sig #7570B3
# CD34 sig #D95F02
plotMA_AJ <- function( object, ylim = NULL,
colNonSig = "gray32", colSig = "red3", colLine = "#ff000080",
log = "x", cex=0.45, xlab="mean expression", ylab="log fold change", ... )
{
if( !( ncol(object) == 3 & inherits( object[[1]], "numeric" ) & inherits( object[[2]], "numeric" )
& inherits( object[[3]], "logical" ) ) ) {
stop( "When called with a data.frame, plotMA expects the data frame to have 3 columns, two numeric ones for mean and log fold change, and a logical one for significance.")
}
colnames(object) <- c( "mean", "lfc", "sig" )
object = subset( object, mean != 0 )
py = object$lfc
if( is.null(ylim) )
ylim = c(-1,1) * quantile(abs(py[is.finite(py)]), probs=0.99) * 1.1
plot(object$mean, pmax(ylim[1], pmin(ylim[2], py)),
log=log, pch=ifelse(py<ylim[1], 6, ifelse(py>ylim[2], 2, 16)),
cex=cex, col=ifelse( object$sig, colSig, colNonSig ), xlab=xlab, ylab=ylab, ylim=ylim, ...)
abline( h=0, lwd=4, col= lt_black )
}
colNonSig = "gray32"
ylim <- c(-6,6)
# color non-sig as gray
col_ruv_res_con_CD34_pc_com <- rep(t_col("gray32", percent = 10, name = "lt.gray32"), nrow(ruv_res_con_CD34_pc_com))
head(col_ruv_res_con_CD34_pc_com)
# color CD34 sig as orange
col_ruv_res_con_CD34_pc_com[which(ruv_res_con_CD34_pc_com$padj < 0.05)] <- "#D95F02"
# color shared sig as deep purple
col_ruv_res_con_CD34_pc_com[rownames(ruv_res_con_CD34_pc_com) %in% ruv_res_pc_com_IDs] <- "#551a8b"
# color qPCR targets green
col_ruv_res_con_CD34_pc_com[rownames(ruv_res_con_CD34_pc_com) %in% qPCR_IDs] <- "#006633"
# color CD34 sig as red
col_ruv_res_con_CD34_pc_com[rownames(ruv_res_con_CD34_pc_com) %in% "ENSG00000174059"] <- "red3"
pch_ruv_res_con_CD34_pc_com <- rep(20, nrow(ruv_res_con_CD34_pc_com))
pch_ruv_res_con_CD34_pc_com[col_ruv_res_con_CD34_pc_com != col_ruv_res_con_CD34_pc_com[1]] <- 19
pch_ruv_res_con_CD34_pc_com[rownames(ruv_res_con_CD34_pc_com) %in% qPCR_IDs] <- 17
plot(ruv_res_con_CD34_pc_com$baseMean, pmax(ylim[1], pmin(ylim[2], ruv_res_con_CD34_pc_com$log2FoldChange)),
log="x", pch=pch_ruv_res_con_CD34_pc_com,
cex=0.45, col=col_ruv_res_con_CD34_pc_com,
xlab="mean expression",
ylab="log fold change",
ylim=ylim)
abline(h=0, lwd=4, col=lt_black)
drawLines <- function() abline(h=c(-1,1),col="dodgerblue",lwd=2)
head(ruv_res_con_CD34_pc_com)
DESeq2::plotMA(ruv_res_con_CD34_pc_com,
main="RUV Ctrl vs CD34 PC-com", ylim=ylim); drawLines()
Using the base plotting package, I didn’t have enough control over the size of the points (as well as the transparency), so I’ll make the MA plot using the ggplot package.
head(ruv_res_con_CD34_pc_com)
## log2 fold change (MLE): treatments_com CD34 vs Ctrl
## Wald test p-value: treatments com CD34 vs Ctrl
## DataFrame with 6 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSG00000000003 2063.928940 -0.13312402 0.04538688 0 1
## ENSG00000000419 856.072924 0.52185225 0.05863385 0 1
## ENSG00000000457 334.052490 0.03406567 0.08978050 0 1
## ENSG00000000460 706.150361 0.03311635 0.06495760 0 1
## ENSG00000000971 9.708977 -0.37529870 0.50982271 0 1
## ENSG00000001036 1407.097579 0.36360258 0.04733630 0 1
## padj
## <numeric>
## ENSG00000000003 1
## ENSG00000000419 1
## ENSG00000000457 1
## ENSG00000000460 1
## ENSG00000000971 1
## ENSG00000001036 1
color_genes <- c("#000000", "#FF0000", "#0432FF", "#FF40FF", "#548235", "#FF9300", "#942093", "#00FDFF")
# color CD34 red
ruv_res_con_CD34_pc_com$color <- 1
ruv_res_con_CD34_pc_com$color[rownames(ruv_res_con_CD34_pc_com)=="ENSG00000174059"] <- 2
# make non-DE transparent
ruv_res_con_CD34_pc_com$trans <- 0.1
ruv_res_con_CD34_pc_com$trans[which(ruv_res_con_CD34_pc_com$padj < 0.05)] <- 1
# make CD34 larger
ruv_res_con_CD34_pc_com$size <- 1
ruv_res_con_CD34_pc_com$size[rownames(ruv_res_con_CD34_pc_com)=="ENSG00000174059"] <- 1.5
ruv_res_con_CD34_pc_com_df <- as.data.frame(ruv_res_con_CD34_pc_com)
head(ruv_res_con_CD34_pc_com_df)
## baseMean log2FoldChange lfcSE stat pvalue padj
## ENSG00000000003 2063.928940 -0.13312402 0.04538688 0 1 1
## ENSG00000000419 856.072924 0.52185225 0.05863385 0 1 1
## ENSG00000000457 334.052490 0.03406567 0.08978050 0 1 1
## ENSG00000000460 706.150361 0.03311635 0.06495760 0 1 1
## ENSG00000000971 9.708977 -0.37529870 0.50982271 0 1 1
## ENSG00000001036 1407.097579 0.36360258 0.04733630 0 1 1
## color trans size
## ENSG00000000003 1 0.1 1
## ENSG00000000419 1 0.1 1
## ENSG00000000457 1 0.1 1
## ENSG00000000460 1 0.1 1
## ENSG00000000971 1 0.1 1
## ENSG00000001036 1 0.1 1
str(ruv_res_con_CD34_pc_com_df)
## 'data.frame': 14260 obs. of 9 variables:
## $ baseMean : num 2063.93 856.07 334.05 706.15 9.71 ...
## $ log2FoldChange: num -0.1331 0.5219 0.0341 0.0331 -0.3753 ...
## $ lfcSE : num 0.0454 0.0586 0.0898 0.065 0.5098 ...
## $ stat : num 0 0 0 0 0 0 0 0 0 0 ...
## $ pvalue : num 1 1 1 1 1 1 1 1 1 1 ...
## $ padj : num 1 1 1 1 1 1 1 1 1 1 ...
## $ color : num 1 1 1 1 1 1 1 1 1 1 ...
## $ trans : num 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
## $ size : num 1 1 1 1 1 1 1 1 1 1 ...
ruv_res_con_CD34_pc_com_df$color <- factor(ruv_res_con_CD34_pc_com_df$color)
MAplot_Ctrl_v_CD34 <- ggplot(data = ruv_res_con_CD34_pc_com_df, aes(x = baseMean, y = log2FoldChange, color = color, alpha = trans, size=size)) +
geom_point() +
scale_x_continuous(trans = 'log10') +
scale_alpha_continuous(guide=FALSE) +
scale_size_continuous(guide=FALSE, range = c(1,3)) +
scale_color_manual(values=c("black", "red"), guide=FALSE) +
ylim(c(-7, 7)) +
geom_hline(yintercept=c(-1,1), linetype="dotted") +
xlab("mean expression") +
ylab("log fold change") +
ggtitle("Control vs. CD34") +
theme_tufte()
MAplot_Ctrl_v_CD34
pdf("MAplot_Ctrl_v_CD34.pdf", width=6, height = 4, family="ArialMT")
MAplot_Ctrl_v_CD34
dev.off()
## quartz_off_screen
## 2
Now for the Control vs. CRISPR plot
# color CD34 red
ruv_res_con_crisp_pc_com$color <- 1
ruv_res_con_crisp_pc_com$color[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000174059"] <- 2
# make non-DE transparent
ruv_res_con_crisp_pc_com$trans <- 0.1
ruv_res_con_crisp_pc_com$trans[which(ruv_res_con_crisp_pc_com$padj < 0.05)] <- 1
# make CD34 larger
ruv_res_con_crisp_pc_com$size <- 1
ruv_res_con_crisp_pc_com$size[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000174059"] <- 1.5
ruv_res_con_crisp_pc_com_df <- as.data.frame(ruv_res_con_crisp_pc_com)
head(ruv_res_con_crisp_pc_com_df)
## baseMean log2FoldChange lfcSE stat pvalue padj
## ENSG00000000003 2063.928940 -0.258230887 0.04483725 0 1 1
## ENSG00000000419 856.072924 0.498139331 0.05758999 0 1 1
## ENSG00000000457 334.052490 -0.009779321 0.08815098 0 1 1
## ENSG00000000460 706.150361 -0.011057270 0.06383935 0 1 1
## ENSG00000000971 9.708977 0.062959951 0.48445166 0 1 1
## ENSG00000001036 1407.097579 0.209524611 0.04680729 0 1 1
## color trans size
## ENSG00000000003 1 0.1 1
## ENSG00000000419 1 0.1 1
## ENSG00000000457 1 0.1 1
## ENSG00000000460 1 0.1 1
## ENSG00000000971 1 0.1 1
## ENSG00000001036 1 0.1 1
str(ruv_res_con_crisp_pc_com_df)
## 'data.frame': 14260 obs. of 9 variables:
## $ baseMean : num 2063.93 856.07 334.05 706.15 9.71 ...
## $ log2FoldChange: num -0.25823 0.49814 -0.00978 -0.01106 0.06296 ...
## $ lfcSE : num 0.0448 0.0576 0.0882 0.0638 0.4845 ...
## $ stat : num 0 0 0 0 0 0 0 0 0 0 ...
## $ pvalue : num 1 1 1 1 1 1 1 1 1 1 ...
## $ padj : num 1 1 1 1 1 1 1 1 1 1 ...
## $ color : num 1 1 1 1 1 1 1 1 1 1 ...
## $ trans : num 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
## $ size : num 1 1 1 1 1 1 1 1 1 1 ...
ruv_res_con_crisp_pc_com_df$color <- factor(ruv_res_con_crisp_pc_com_df$color)
MAplot_Ctrl_v_CRISPR <- ggplot(data = ruv_res_con_crisp_pc_com_df, aes(x = baseMean, y = log2FoldChange, color = color, alpha = trans, size=size)) +
geom_point() +
scale_x_continuous(trans = 'log10') +
scale_alpha_continuous(guide=FALSE) +
scale_size_continuous(guide=FALSE, range = c(1,3)) +
scale_color_manual(values=c("black", "red"), guide=FALSE) +
ylim(c(-7, 7)) +
geom_hline(yintercept=c(-1,1), linetype="dotted") +
xlab("mean expression") +
ylab("log fold change") +
ggtitle("Control vs. CRISPR") +
theme_tufte()
MAplot_Ctrl_v_CRISPR
pdf("MAplot_Ctrl_v_CRISPR.pdf", width=6, height = 4, family="ArialMT")
MAplot_Ctrl_v_CRISPR
dev.off()
## quartz_off_screen
## 2
# remove CD34 highlight
ruv_res_con_crisp_pc_com$size[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000174059"] <- 1
ruv_res_con_crisp_pc_com$color[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000174059"] <- 1
ruv_res_con_crisp_pc_com_df <- as.data.frame(ruv_res_con_crisp_pc_com)
MAplot_Ctrl_v_CRISPR2 <- ggplot(data = ruv_res_con_crisp_pc_com_df, aes(x = baseMean, y = log2FoldChange, color = as.factor(color), alpha = trans)) +
geom_point() +
scale_x_continuous(trans = 'log10') +
scale_alpha_continuous(guide=FALSE) +
scale_color_manual(values=c("black", "red"), guide=FALSE) +
ylim(c(-7, 7)) +
geom_hline(yintercept=c(-1,1), linetype="dotted") +
xlab("mean expression") +
ylab("log fold change") +
ggtitle("Control vs. CRISPR") +
theme_tufte()
MAplot_Ctrl_v_CRISPR2
pdf("MAplot_Ctrl_v_CRISPR2.pdf", width=6, height = 4, family="ArialMT")
MAplot_Ctrl_v_CRISPR2
dev.off()
## quartz_off_screen
## 2
CRISPR vs. CD34 plot
# color CD34 red
ruv_res_crisp_CD34_pc_com$color <- 1
ruv_res_crisp_CD34_pc_com$color[rownames(ruv_res_crisp_CD34_pc_com)=="ENSG00000174059"] <- 2
# make non-DE transparent
ruv_res_crisp_CD34_pc_com$trans <- 0.1
ruv_res_crisp_CD34_pc_com$trans[which(ruv_res_crisp_CD34_pc_com$padj < 0.05)] <- 1
# make CD34 larger
ruv_res_crisp_CD34_pc_com$size <- 1
ruv_res_crisp_CD34_pc_com$size[rownames(ruv_res_crisp_CD34_pc_com)=="ENSG00000174059"] <- 1.5
ruv_res_crisp_CD34_pc_com_df <- as.data.frame(ruv_res_crisp_CD34_pc_com)
head(ruv_res_crisp_CD34_pc_com_df)
## baseMean log2FoldChange lfcSE stat pvalue padj
## ENSG00000000003 2063.928940 0.12510687 0.04655009 0 1 1
## ENSG00000000419 856.072924 0.02371291 0.05771032 0 1 1
## ENSG00000000457 334.052490 0.04384499 0.09139576 0 1 1
## ENSG00000000460 706.150361 0.04417362 0.06611236 0 1 1
## ENSG00000000971 9.708977 -0.43825865 0.51390673 0 1 1
## ENSG00000001036 1407.097579 0.15407797 0.04753871 0 1 1
## color trans size
## ENSG00000000003 1 0.1 1
## ENSG00000000419 1 0.1 1
## ENSG00000000457 1 0.1 1
## ENSG00000000460 1 0.1 1
## ENSG00000000971 1 0.1 1
## ENSG00000001036 1 0.1 1
str(ruv_res_crisp_CD34_pc_com_df)
## 'data.frame': 14260 obs. of 9 variables:
## $ baseMean : num 2063.93 856.07 334.05 706.15 9.71 ...
## $ log2FoldChange: num 0.1251 0.0237 0.0438 0.0442 -0.4383 ...
## $ lfcSE : num 0.0466 0.0577 0.0914 0.0661 0.5139 ...
## $ stat : num 0 0 0 0 0 0 0 0 0 0 ...
## $ pvalue : num 1 1 1 1 1 1 1 1 1 1 ...
## $ padj : num 1 1 1 1 1 1 1 1 1 1 ...
## $ color : num 1 1 1 1 1 1 1 1 1 1 ...
## $ trans : num 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
## $ size : num 1 1 1 1 1 1 1 1 1 1 ...
ruv_res_crisp_CD34_pc_com_df$color <- factor(ruv_res_crisp_CD34_pc_com_df$color)
MAplot_CRSIPR_v_CD34 <- ggplot(data = ruv_res_crisp_CD34_pc_com_df, aes(x = baseMean, y = log2FoldChange, color = color, alpha = trans, size=size)) +
geom_point() +
scale_x_continuous(trans = 'log10') +
scale_alpha_continuous(guide=FALSE) +
scale_size_continuous(guide=FALSE, range = c(1,3)) +
scale_color_manual(values=c("black", "red"), guide=FALSE) +
ylim(c(-7, 7)) +
geom_hline(yintercept=c(-1,1), linetype="dotted") +
xlab("mean expression") +
ylab("log fold change") +
ggtitle("CRISPR vs. CD34") +
theme_tufte()
MAplot_CRSIPR_v_CD34
pdf("MAplot_CRSIPR_v_CD34.pdf", width=6, height = 4, family="ArialMT")
MAplot_CRSIPR_v_CD34
dev.off()
## quartz_off_screen
## 2
Now to MAplots highlighting the six genes that claudia is testing.
# color genes
ruv_res_con_crisp_pc_com$color <- 1
ruv_res_con_crisp_pc_com$color[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000174059"] <- 2
ruv_res_con_crisp_pc_com$color[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000104856"] <- 3
ruv_res_con_crisp_pc_com$color[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000077150"] <- 4
ruv_res_con_crisp_pc_com$color[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000175197"] <- 5
ruv_res_con_crisp_pc_com$color[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000171223"] <- 6
ruv_res_con_crisp_pc_com$color[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000175592"] <- 7
ruv_res_con_crisp_pc_com$color[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000101255"] <- 8
# make non-DE transparent
ruv_res_con_crisp_pc_com$trans <- 0.1
ruv_res_con_crisp_pc_com$trans[which(ruv_res_con_crisp_pc_com$padj < 0.05)] <- 1
# make CD34/genes larger
ruv_res_con_crisp_pc_com$size <- 1
GOI <- c("ENSG00000174059", "ENSG00000101255", "ENSG00000077150", "ENSG00000175197",
"ENSG00000171223", "ENSG00000175592", "ENSG00000104856")
ruv_res_con_crisp_pc_com$size[rownames(ruv_res_con_crisp_pc_com) %in% GOI] <- 1.5
ruv_res_con_crisp_pc_com_df <- as.data.frame(ruv_res_con_crisp_pc_com)
head(ruv_res_con_crisp_pc_com_df)
## baseMean log2FoldChange lfcSE stat pvalue padj
## ENSG00000000003 2063.928940 -0.258230887 0.04483725 0 1 1
## ENSG00000000419 856.072924 0.498139331 0.05758999 0 1 1
## ENSG00000000457 334.052490 -0.009779321 0.08815098 0 1 1
## ENSG00000000460 706.150361 -0.011057270 0.06383935 0 1 1
## ENSG00000000971 9.708977 0.062959951 0.48445166 0 1 1
## ENSG00000001036 1407.097579 0.209524611 0.04680729 0 1 1
## color trans size
## ENSG00000000003 1 0.1 1
## ENSG00000000419 1 0.1 1
## ENSG00000000457 1 0.1 1
## ENSG00000000460 1 0.1 1
## ENSG00000000971 1 0.1 1
## ENSG00000001036 1 0.1 1
str(ruv_res_con_crisp_pc_com_df)
## 'data.frame': 14260 obs. of 9 variables:
## $ baseMean : num 2063.93 856.07 334.05 706.15 9.71 ...
## $ log2FoldChange: num -0.25823 0.49814 -0.00978 -0.01106 0.06296 ...
## $ lfcSE : num 0.0448 0.0576 0.0882 0.0638 0.4845 ...
## $ stat : num 0 0 0 0 0 0 0 0 0 0 ...
## $ pvalue : num 1 1 1 1 1 1 1 1 1 1 ...
## $ padj : num 1 1 1 1 1 1 1 1 1 1 ...
## $ color : num 1 1 1 1 1 1 1 1 1 1 ...
## $ trans : num 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
## $ size : num 1 1 1 1 1 1 1 1 1 1 ...
ruv_res_con_crisp_pc_com_df$color <- factor(ruv_res_con_crisp_pc_com_df$color)
levels(ruv_res_con_crisp_pc_com_df$color)
## [1] "1" "2" "3" "4" "5" "6" "7" "8"
MAplot_Ctrl_v_CRISPR_qPCR <- ggplot(data = ruv_res_con_crisp_pc_com_df, aes(x = baseMean, y = log2FoldChange, color = color, alpha = trans, size=size)) +
geom_point() +
scale_x_continuous(trans = 'log10') +
scale_alpha_continuous(guide=FALSE) +
scale_size_continuous(guide=FALSE, range = c(1,3)) +
scale_color_manual(values=color_genes, guide=FALSE) +
ylim(c(-7, 7)) +
geom_hline(yintercept=c(-1,1), linetype="dotted") +
xlab("mean expression") +
ylab("log fold change") +
ggtitle("Control vs. CRISPR") +
theme_tufte()
MAplot_Ctrl_v_CRISPR_qPCR
pdf("MAplot_Ctrl_v_CRISPR_qPCR.pdf", width=6, height = 4, family="ArialMT")
MAplot_Ctrl_v_CRISPR_qPCR
dev.off()
## quartz_off_screen
## 2
Now to MAplots highlighting the six genes that claudia is testing.
# color genes
ruv_res_con_CD34_pc_com$color <- 1
ruv_res_con_CD34_pc_com$color[rownames(ruv_res_con_CD34_pc_com)=="ENSG00000174059"] <- 2
ruv_res_con_CD34_pc_com$color[rownames(ruv_res_con_CD34_pc_com)=="ENSG00000104856"] <- 3
ruv_res_con_CD34_pc_com$color[rownames(ruv_res_con_CD34_pc_com)=="ENSG00000077150"] <- 4
ruv_res_con_CD34_pc_com$color[rownames(ruv_res_con_CD34_pc_com)=="ENSG00000175197"] <- 5
ruv_res_con_CD34_pc_com$color[rownames(ruv_res_con_CD34_pc_com)=="ENSG00000171223"] <- 6
ruv_res_con_CD34_pc_com$color[rownames(ruv_res_con_CD34_pc_com)=="ENSG00000175592"] <- 7
ruv_res_con_CD34_pc_com$color[rownames(ruv_res_con_CD34_pc_com)=="ENSG00000101255"] <- 8
# make non-DE transparent
ruv_res_con_CD34_pc_com$trans <- 0.1
ruv_res_con_CD34_pc_com$trans[which(ruv_res_con_CD34_pc_com$padj < 0.05)] <- 1
# make CD34/genes larger
ruv_res_con_CD34_pc_com$size <- 1
GOI <- c("ENSG00000174059", "ENSG00000101255", "ENSG00000077150", "ENSG00000175197",
"ENSG00000171223", "ENSG00000175592", "ENSG00000104856")
ruv_res_con_CD34_pc_com$size[rownames(ruv_res_con_CD34_pc_com) %in% GOI] <- 1.5
ruv_res_con_CD34_pc_com_df <- as.data.frame(ruv_res_con_CD34_pc_com)
head(ruv_res_con_CD34_pc_com_df)
## baseMean log2FoldChange lfcSE stat pvalue padj
## ENSG00000000003 2063.928940 -0.13312402 0.04538688 0 1 1
## ENSG00000000419 856.072924 0.52185225 0.05863385 0 1 1
## ENSG00000000457 334.052490 0.03406567 0.08978050 0 1 1
## ENSG00000000460 706.150361 0.03311635 0.06495760 0 1 1
## ENSG00000000971 9.708977 -0.37529870 0.50982271 0 1 1
## ENSG00000001036 1407.097579 0.36360258 0.04733630 0 1 1
## color trans size
## ENSG00000000003 1 0.1 1
## ENSG00000000419 1 0.1 1
## ENSG00000000457 1 0.1 1
## ENSG00000000460 1 0.1 1
## ENSG00000000971 1 0.1 1
## ENSG00000001036 1 0.1 1
str(ruv_res_con_CD34_pc_com_df)
## 'data.frame': 14260 obs. of 9 variables:
## $ baseMean : num 2063.93 856.07 334.05 706.15 9.71 ...
## $ log2FoldChange: num -0.1331 0.5219 0.0341 0.0331 -0.3753 ...
## $ lfcSE : num 0.0454 0.0586 0.0898 0.065 0.5098 ...
## $ stat : num 0 0 0 0 0 0 0 0 0 0 ...
## $ pvalue : num 1 1 1 1 1 1 1 1 1 1 ...
## $ padj : num 1 1 1 1 1 1 1 1 1 1 ...
## $ color : num 1 1 1 1 1 1 1 1 1 1 ...
## $ trans : num 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
## $ size : num 1 1 1 1 1 1 1 1 1 1 ...
ruv_res_con_CD34_pc_com_df$color <- factor(ruv_res_con_CD34_pc_com_df$color)
levels(ruv_res_con_CD34_pc_com_df$color)
## [1] "1" "2" "3" "4" "5" "6" "7" "8"
MAplot_Ctrl_v_CD34_qPCR <- ggplot(data = ruv_res_con_CD34_pc_com_df, aes(x = baseMean, y = log2FoldChange, color = color, alpha = trans, size=size)) +
geom_point() +
scale_x_continuous(trans = 'log10') +
scale_alpha_continuous(guide=FALSE) +
scale_size_continuous(guide=FALSE, range = c(1,3)) +
scale_color_manual(values=color_genes, guide=FALSE) +
ylim(c(-7, 7)) +
geom_hline(yintercept=c(-1,1), linetype="dotted") +
xlab("mean expression") +
ylab("log fold change") +
ggtitle("Control vs. CD34") +
theme_tufte()
MAplot_Ctrl_v_CD34_qPCR
pdf("MAplot_Ctrl_v_CD34_qPCR.pdf", width=6, height = 4, family="ArialMT")
MAplot_Ctrl_v_CD34_qPCR
dev.off()
## quartz_off_screen
## 2
Outputting the Session Info
sessionInfo()
## R version 3.4.1 (2017-06-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: OS X El Capitan 10.11.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid stats4 parallel stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] VennDiagram_1.6.18 futile.logger_1.4.3
## [3] ggthemes_3.4.0 ggplot2_2.2.1
## [5] RFmarkerDetector_1.0.1 AUCRF_1.1
## [7] randomForest_4.6-12 UpSetR_1.3.3
## [9] pheatmap_1.0.8 RColorBrewer_1.1-2
## [11] genefilter_1.58.1 qvalue_2.8.0
## [13] RUVSeq_1.10.0 edgeR_3.18.1
## [15] limma_3.32.10 EDASeq_2.10.0
## [17] ShortRead_1.34.2 GenomicAlignments_1.12.2
## [19] Rsamtools_1.28.0 Biostrings_2.44.2
## [21] XVector_0.16.0 BiocParallel_1.10.1
## [23] DESeq2_1.16.1 SummarizedExperiment_1.6.5
## [25] DelayedArray_0.2.7 matrixStats_0.52.2
## [27] Biobase_2.36.2 GenomicRanges_1.28.6
## [29] GenomeInfoDb_1.12.3 IRanges_2.10.5
## [31] S4Vectors_0.14.7 BiocGenerics_0.22.1
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.3-2 hwriter_1.3.2
## [3] rprojroot_1.2 htmlTable_1.11.0
## [5] base64enc_0.1-3 rstudioapi_0.7
## [7] bit64_0.9-7 AnnotationDbi_1.38.2
## [9] codetools_0.2-15 splines_3.4.1
## [11] R.methodsS3_1.7.1 DESeq_1.28.0
## [13] WilcoxCV_1.0-2 geneplotter_1.54.0
## [15] knitr_1.17 Formula_1.2-2
## [17] annotate_1.54.0 cluster_2.0.6
## [19] R.oo_1.21.0 compiler_3.4.1
## [21] httr_1.3.1 backports_1.1.1
## [23] assertthat_0.2.0 Matrix_1.2-12
## [25] lazyeval_0.2.1 acepack_1.4.1
## [27] htmltools_0.3.6 prettyunits_1.0.2
## [29] tools_3.4.1 bindrcpp_0.2
## [31] gtable_0.2.0 glue_1.2.0
## [33] GenomeInfoDbData_0.99.0 reshape2_1.4.2
## [35] dplyr_0.7.4 Rcpp_0.12.14
## [37] gdata_2.18.0 UsingR_2.0-5
## [39] rtracklayer_1.36.6 stringr_1.2.0
## [41] gtools_3.5.0 XML_3.98-1.9
## [43] HistData_0.8-2 zlibbioc_1.22.0
## [45] MASS_7.3-47 scales_0.5.0.9000
## [47] aroma.light_3.6.0 lambda.r_1.2
## [49] yaml_2.1.15 memoise_1.1.0
## [51] gridExtra_2.3 biomaRt_2.35.8
## [53] rpart_4.1-11 latticeExtra_0.6-28
## [55] stringi_1.1.6 RSQLite_2.0
## [57] checkmate_1.8.5 GenomicFeatures_1.28.5
## [59] caTools_1.17.1 rlang_0.1.4
## [61] pkgconfig_2.0.1 bitops_1.0-6
## [63] evaluate_0.10.1 lattice_0.20-35
## [65] ROCR_1.0-7 purrr_0.2.4
## [67] bindr_0.1 labeling_0.3
## [69] htmlwidgets_0.9 bit_1.1-12
## [71] plyr_1.8.4 magrittr_1.5
## [73] R6_2.2.2 gplots_3.0.1
## [75] Hmisc_4.0-3 DBI_0.7
## [77] foreign_0.8-69 survival_2.41-3
## [79] RCurl_1.95-4.10 nnet_7.3-12
## [81] tibble_1.3.4 futile.options_1.0.0
## [83] KernSmooth_2.23-15 rmarkdown_1.8
## [85] progress_1.1.2 locfit_1.5-9.1
## [87] data.table_1.10.4-3 blob_1.1.0
## [89] digest_0.6.12 xtable_1.8-2
## [91] tidyr_0.7.2 R.utils_2.6.0
## [93] munsell_0.4.3